# -*- coding: utf-8 -*-

# 1.	居理新房页面源码存成.txt（4分）
# 2.	获取海淀楼盘名（8分）
# 3.	获取海淀楼盘地址（8分）
# 4.	获取海淀楼盘面积（8分）
# 5.	获取海淀楼盘开盘时间（8分）
# 6.	获取海淀楼盘是几居室（8分）
# 7.	获取海淀楼盘宣传图片（8分）
# 8.	获取每平米价格（8分）
# 9.	获取参考价 （8分）
# 10.	实现翻页抓取，抓取2页
# 1. 将获取楼盘名存成txt（8分）
# 2. 将爬取数据全部存入mysql数据库,并建立相应字段（8分）
# 3. 将爬取数据全部存入mongodb数据库（8分）

import scrapy
from ..items import JulixinfangItem


class JuliSpider(scrapy.Spider):
    name = 'juli'
    allowed_domains = ['xxx']
    start_urls = ['http://www.julive.com/project/s/z1']
    for page in range(2, 3):
        url = 'http://www.julive.com/project/s/z' + str(page)
        start_urls.append(url)

    def parse(self, response):
        all_div = response.xpath("//div[@class='house-list']/div[@class='house-item main_click_total']")
        for div in all_div:
            name = div.xpath(".//h4[@class='title']/a[@class='name project-card-item']/text()")[0].extract()
            dizhi = div.xpath(".//span[@class='position-des']/a/text()")[0].extract()
            mianji = div.xpath(".//span[@class='area']/a/text()")[0].extract()
            shijian = div.xpath(".//p[@class='celling']/a[@class='project-card-item']/text()")[0].extract()
            jushi = div.xpath(".//div[@class='types']/a[@class='project-card-item']/text()")[0].extract()
            tupian = div.xpath(".//img[@class='lazy']/@data-original")[0].extract()
            pingjunjia = div.xpath(".//div[@class='total-price ']/span[@class='number']/text()")[0].extract()
            cankaojia_div = div.xpath(".//div[@class='developer']/span[@class='number']/text()")
            if len(cankaojia_div) == 0:
                cankaojia = pingjunjia
            else:
                cankaojia = div.xpath(".//div[@class='developer']/span[@class='number']/text()")[0].extract()
            print(name)
            item = JulixinfangItem()
            item['name'] = name
            item['dizhi'] = dizhi
            item['mianji'] = mianji
            item['shijian'] = shijian
            item['jushi'] = jushi
            item['tupian'] = tupian
            item['pingjunjia'] = pingjunjia
            item['cankaojia'] = cankaojia
            yield item
